From a07f4bc4d3650b5022e447279abe8af7de16cf5c Mon Sep 17 00:00:00 2001 From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= Date: Fri, 1 Sep 2017 21:31:24 +0200 Subject: [PATCH] babl: move RGB space conversion code to babl-space.c --- babl/babl-fish-path.c | 395 +---------------------------------------- babl/babl-internal.h | 1 + babl/babl-space.c | 400 ++++++++++++++++++++++++++++++++++++++++++ babl/babl-trc.c | 71 +++++--- babl/babl-trc.h | 8 +- 5 files changed, 453 insertions(+), 422 deletions(-) diff --git a/babl/babl-fish-path.c b/babl/babl-fish-path.c index 176f02f..48629be 100644 --- a/babl/babl-fish-path.c +++ b/babl/babl-fish-path.c @@ -399,397 +399,6 @@ alias_conversion (Babl *babl, return 0; } -static void prep_conversion (const Babl *babl) -{ - Babl *conversion = (void*) babl; - const Babl *source_space = babl_conversion_get_source_space (conversion); - float *matrixf; - int i; - float *lut; - - double matrix[9]; - babl_matrix_mul_matrix ( - (conversion->conversion.destination)->format.space->space.XYZtoRGB, - (conversion->conversion.source)->format.space->space.RGBtoXYZ, - matrix); - - matrixf = babl_calloc (sizeof (float), 9 + 256); // we leak this matrix , which is a singleton - babl_matrix_to_float (matrix, matrixf); - conversion->conversion.data = matrixf; - - lut = matrixf + 9; - for (i = 0; i < 256; i++) - { - lut[i] = babl_trc_to_linear (source_space->space.trc[0], i/255.0); - // XXX: should have green and blue luts as well - } -} - -static inline long -universal_nonlinear_rgb_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) -{ - const Babl *source_space = babl_conversion_get_source_space (conversion); - const Babl *destination_space = babl_conversion_get_destination_space (conversion); - - float * matrixf = conversion->conversion.data; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - - { - int i; - for (i = 0; i < samples; i++) - { - rgba_out[i*4+3] = rgba_in[i*4+3]; - } - } - { - int c; - for (c = 0; c < 3; c ++) - { - const Babl *trc = (void*)source_space->space.trc[c]; - babl_trc_to_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, samples); - } - } - - babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples); - - { - int c; - for (c = 0; c < 3; c ++) - { - const Babl *trc = (void*)destination_space->space.trc[c]; - babl_trc_from_linear_buf(trc, rgba_out + c, rgba_out + c, 4, 4, samples); - } - } - - return samples; -} - -static inline long -universal_nonlinear_rgb_linear_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) -{ - const Babl *source_space = babl_conversion_get_source_space (conversion); - float * matrixf = conversion->conversion.data; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - - { - int i; - for (i = 0; i < samples; i++) - { - rgba_out[i*4+3] = rgba_in[i*4+3]; - } - } - { - int c; - for (c = 0; c < 3; c ++) - { - const Babl *trc = (void*)source_space->space.trc[c]; - babl_trc_to_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, samples); - } - } - - babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples); - - return samples; -} - - -static inline long -universal_nonlinear_rgba_u8_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) -{ - const Babl *destination_space = conversion->conversion.destination->format.space; - - float * matrixf = conversion->conversion.data; - float * in_trc_lut = matrixf + 9; - int i; - uint8_t *rgba_in_u8 = (void*)src_char; - uint8_t *rgba_out_u8 = (void*)dst_char; - - float *rgb = aligned_alloc (16, sizeof(float) * 4 * samples); - - for (i = 0; i < samples; i++) - { - rgb[i*4+0]=in_trc_lut[rgba_in_u8[i*4+0]]; - rgb[i*4+1]=in_trc_lut[rgba_in_u8[i*4+1]]; - rgb[i*4+2]=in_trc_lut[rgba_in_u8[i*4+2]]; - } - - babl_matrix_mul_vectorff_buf4 (matrixf, rgb, rgb, samples); - - { - const Babl *from_trc_red = (void*)destination_space->space.trc[0]; - const Babl *from_trc_green = (void*)destination_space->space.trc[1]; - const Babl *from_trc_blue = (void*)destination_space->space.trc[2]; - for (i = 0; i < samples; i++) - { - rgba_out_u8[0] = babl_trc_from_linear (from_trc_red, rgb[i*4+0]) * 255.5f; - rgba_out_u8[1] = babl_trc_from_linear (from_trc_green, rgb[i*4+1]) * 255.5f; - rgba_out_u8[2] = babl_trc_from_linear (from_trc_blue, rgb[i*4+2]) * 255.5f; - rgba_out_u8[3] = rgba_in_u8[3]; - rgba_in_u8 += 4; - rgba_out_u8 += 4; - } - } - - return samples; -} - - -static inline long -universal_rgba_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) -{ - float *matrixf = conversion->conversion.data; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - - babl_matrix_mul_vectorff_buf4 (matrixf, rgba_in, rgba_out, samples); - - return samples; -} - -static inline long -universal_rgb_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) -{ - float *matrixf = conversion->conversion.data; - float *rgb_in = (void*)src_char; - float *rgb_out = (void*)dst_char; - - babl_matrix_mul_vectorff_buf3 (matrixf, rgb_in, rgb_out, samples); - - return samples; -} - -#if defined(USE_SSE2) - -#define m(matr, j, i) matr[j*3+i] - -#include - -static inline void babl_matrix_mul_vectorff_buf4_sse2 (const float *mat, - const float *v_in, - float *v_out, - int samples) -{ - const __v4sf m___0 = {m(mat, 0, 0), m(mat, 1, 0), m(mat, 2, 0), 0}; - const __v4sf m___1 = {m(mat, 0, 1), m(mat, 1, 1), m(mat, 2, 1), 0}; - const __v4sf m___2 = {m(mat, 0, 2), m(mat, 1, 2), m(mat, 2, 2), 0}; - int i; - for (i = 0; i < samples; i ++) - { - __v4sf a, b, c = _mm_load_ps(&v_in[0]); - a = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(0,0,0,0)); - b = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(1,1,1,1)); - c = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(2,2,2,2)); - _mm_store_ps (v_out, m___0 * a + m___1 * b + m___2 * c); - v_out[3] = v_in[3]; - v_out += 4; - v_in += 4; - } - _mm_empty (); -} - -#undef m - -static inline long -universal_nonlinear_rgb_converter_sse2 (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) -{ - const Babl *source_space = babl_conversion_get_source_space (conversion); - const Babl *destination_space = babl_conversion_get_destination_space (conversion); - float * matrixf = conversion->conversion.data; - int i; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - { - int c; - for (c = 0; c < 3; c ++) - { - const Babl *trc = (void*)source_space->space.trc[c]; - babl_trc_to_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, samples); - } - } - for (i = 0; i < samples; i++) - { - rgba_out[i*4+3]=rgba_in[3]; - } - babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples); - { - int c; - for (c = 0; c < 3; c ++) - { - const Babl *trc = (void*)destination_space->space.trc[c]; - babl_trc_from_linear_buf(trc, rgba_out + c, rgba_out + c, 4, 4, samples); - } - } - return samples; -} - - -static inline long -universal_rgba_converter_sse2 (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) -{ - float *matrixf = conversion->conversion.data; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - - babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_in, rgba_out, samples); - - return samples; -} - -static inline long -universal_nonlinear_rgba_u8_converter_sse2 (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) -{ - const Babl *destination_space = conversion->conversion.destination->format.space; - - float * matrixf = conversion->conversion.data; - float * in_trc_lut = matrixf + 9; - int i; - uint8_t *rgba_in_u8 = (void*)src_char; - uint8_t *rgba_out_u8 = (void*)dst_char; - - float *rgb = aligned_alloc (16, sizeof(float) * 4 * samples); - - for (i = 0; i < samples; i++) - { - rgb[i*4+0]=in_trc_lut[rgba_in_u8[i*4+0]]; - rgb[i*4+1]=in_trc_lut[rgba_in_u8[i*4+1]]; - rgb[i*4+2]=in_trc_lut[rgba_in_u8[i*4+2]]; - rgba_out_u8[i*4+3] = rgba_in_u8[i*4+3]; - } - - babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgb, rgb, samples); - - { - int c; - for (c = 0; c < 3; c ++) - { - const Babl *trc = (void*)destination_space->space.trc[0]; - babl_trc_from_linear_buf(trc, rgb + c, rgb + c, 4, 4, samples); - } - - /* XXX: this is a prime candidate for sseification */ - for (i = 0; i < samples; i++) - for (c = 0; c < 3; c ++) - rgba_out_u8[i*4+c] = rgb[i*4+c] * 255.5f; - } - - return samples; -} - -static inline long -universal_nonlinear_rgb_linear_converter_sse2 (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) -{ - const Babl *source_space = babl_conversion_get_source_space (conversion); - float * matrixf = conversion->conversion.data; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - - { - int i; - for (i = 0; i < samples; i++) - { - rgba_out[i*4+3] = rgba_in[i*4+3]; - } - } - { - int c; - for (c = 0; c < 3; c ++) - { - const Babl *trc = (void*)source_space->space.trc[c]; - babl_trc_to_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, samples); - } - } - - babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples); - - return samples; -} -#endif - - -static int -add_rgb_adapter (Babl *babl, - void *space) -{ - if (babl != space) - { - -#if defined(USE_SSE2) - if ((babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE) && - (babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE2)) - { - prep_conversion(babl_conversion_new(babl_format_with_space("RGBA float", space), - babl_format_with_space("RGBA float", babl), - "linear", universal_rgba_converter_sse2, - NULL)); - prep_conversion(babl_conversion_new(babl_format_with_space("RGBA float", babl), - babl_format_with_space("RGBA float", space), - "linear", universal_rgba_converter_sse2, - NULL)); - prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A float", space), - babl_format_with_space("R'G'B'A float", babl), - "linear", universal_nonlinear_rgb_converter_sse2, - NULL)); - prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A float", babl), - babl_format_with_space("R'G'B'A float", space), - "linear", universal_nonlinear_rgb_converter_sse2, - NULL)); - prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A u8", space), - babl_format_with_space("R'G'B'A u8", babl), - "linear", universal_nonlinear_rgba_u8_converter_sse2, - NULL)); - prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A u8", babl), - babl_format_with_space("R'G'B'A u8", space), - "linear", universal_nonlinear_rgba_u8_converter_sse2, - NULL)); - } - else -#endif - { - prep_conversion(babl_conversion_new(babl_format_with_space("RGBA float", space), - babl_format_with_space("RGBA float", babl), - "linear", universal_rgba_converter, - NULL)); - prep_conversion(babl_conversion_new(babl_format_with_space("RGBA float", babl), - babl_format_with_space("RGBA float", space), - "linear", universal_rgba_converter, - NULL)); - prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A float", space), - babl_format_with_space("R'G'B'A float", babl), - "linear", universal_nonlinear_rgb_converter, - NULL)); - prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A float", babl), - babl_format_with_space("R'G'B'A float", space), - "linear", universal_nonlinear_rgb_converter, - NULL)); - prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A u8", space), - babl_format_with_space("R'G'B'A u8", babl), - "linear", universal_nonlinear_rgba_u8_converter, - NULL)); - prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A u8", babl), - babl_format_with_space("R'G'B'A u8", space), - "linear", universal_nonlinear_rgba_u8_converter, - NULL)); - } - - prep_conversion(babl_conversion_new(babl_format_with_space("RGB float", space), - babl_format_with_space("RGB float", babl), - "linear", universal_rgb_converter, - NULL)); - prep_conversion(babl_conversion_new(babl_format_with_space("RGB float", babl), - babl_format_with_space("RGB float", space), - "linear", universal_rgb_converter, - NULL)); - } - return 0; -} - -static void add_universal_rgb (const Babl *space) -{ - babl_space_class_for_each (add_rgb_adapter, (void*)space); -} - Babl * babl_fish_path (const Babl *source, const Babl *destination) @@ -829,14 +438,14 @@ babl_fish_path (const Babl *source, run_once[i++] = source->format.space; babl_conversion_class_for_each (alias_conversion, (void*)source->format.space); - add_universal_rgb (source->format.space); + _babl_space_add_universal_rgb (source->format.space); } if ((done & 2) == 0 && (destination->format.space != source->format.space) && (destination->format.space != sRGB)) { run_once[i++] = destination->format.space; babl_conversion_class_for_each (alias_conversion, (void*)destination->format.space); - add_universal_rgb (destination->format.space); + _babl_space_add_universal_rgb (destination->format.space); } if (!done && 0) diff --git a/babl/babl-internal.h b/babl/babl-internal.h index 9ddb9c1..440f7e8 100644 --- a/babl/babl-internal.h +++ b/babl/babl-internal.h @@ -361,5 +361,6 @@ int babl_list_destroy (void *data); const char * babl_conversion_create_name (Babl *source, Babl *destination, int is_reference); +void _babl_space_add_universal_rgb (const Babl *space); #endif diff --git a/babl/babl-space.c b/babl/babl-space.c index beecf3c..77fc1ec 100644 --- a/babl/babl-space.c +++ b/babl/babl-space.c @@ -389,3 +389,403 @@ const double * babl_space_get_rgbtoxyz (const Babl *space) { return space->space.RGBtoXYZ; } + +/////////////////// + + +static void prep_conversion (const Babl *babl) +{ + Babl *conversion = (void*) babl; + const Babl *source_space = babl_conversion_get_source_space (conversion); + float *matrixf; + int i; + float *lut; + + double matrix[9]; + babl_matrix_mul_matrix ( + (conversion->conversion.destination)->format.space->space.XYZtoRGB, + (conversion->conversion.source)->format.space->space.RGBtoXYZ, + matrix); + + matrixf = babl_calloc (sizeof (float), 9 + 256); // we leak this matrix , which is a singleton + babl_matrix_to_float (matrix, matrixf); + conversion->conversion.data = matrixf; + + lut = matrixf + 9; + for (i = 0; i < 256; i++) + { + lut[i] = babl_trc_to_linear (source_space->space.trc[0], i/255.0); + // XXX: should have green and blue luts as well + } +} + +static inline long +universal_nonlinear_rgb_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) +{ + const Babl *source_space = babl_conversion_get_source_space (conversion); + const Babl *destination_space = babl_conversion_get_destination_space (conversion); + + float * matrixf = conversion->conversion.data; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + + { + int i; + for (i = 0; i < samples; i++) + { + rgba_out[i*4+3] = rgba_in[i*4+3]; + } + } + { + int c; + for (c = 0; c < 3; c ++) + { + const Babl *trc = (void*)source_space->space.trc[c]; + babl_trc_to_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, 1, samples); + } + } + + babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples); + + { + int c; + for (c = 0; c < 3; c ++) + { + const Babl *trc = (void*)destination_space->space.trc[c]; + babl_trc_from_linear_buf(trc, rgba_out + c, rgba_out + c, 4, 4, 1, samples); + } + } + + return samples; +} + +static inline long +universal_nonlinear_rgb_linear_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) +{ + const Babl *source_space = babl_conversion_get_source_space (conversion); + float * matrixf = conversion->conversion.data; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + + { + int i; + for (i = 0; i < samples; i++) + { + rgba_out[i*4+3] = rgba_in[i*4+3]; + } + } + { + int c; + for (c = 0; c < 3; c ++) + { + const Babl *trc = (void*)source_space->space.trc[c]; + babl_trc_to_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, 1, samples); + } + } + + babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples); + + return samples; +} + + +static inline long +universal_nonlinear_rgba_u8_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) +{ + const Babl *destination_space = conversion->conversion.destination->format.space; + + float * matrixf = conversion->conversion.data; + float * in_trc_lut = matrixf + 9; + int i; + uint8_t *rgba_in_u8 = (void*)src_char; + uint8_t *rgba_out_u8 = (void*)dst_char; + + float *rgb = aligned_alloc (16, sizeof(float) * 4 * samples); + + for (i = 0; i < samples; i++) + { + rgb[i*4+0]=in_trc_lut[rgba_in_u8[i*4+0]]; + rgb[i*4+1]=in_trc_lut[rgba_in_u8[i*4+1]]; + rgb[i*4+2]=in_trc_lut[rgba_in_u8[i*4+2]]; + } + + babl_matrix_mul_vectorff_buf4 (matrixf, rgb, rgb, samples); + + { + const Babl *from_trc_red = (void*)destination_space->space.trc[0]; + const Babl *from_trc_green = (void*)destination_space->space.trc[1]; + const Babl *from_trc_blue = (void*)destination_space->space.trc[2]; + for (i = 0; i < samples; i++) + { + rgba_out_u8[0] = babl_trc_from_linear (from_trc_red, rgb[i*4+0]) * 255.5f; + rgba_out_u8[1] = babl_trc_from_linear (from_trc_green, rgb[i*4+1]) * 255.5f; + rgba_out_u8[2] = babl_trc_from_linear (from_trc_blue, rgb[i*4+2]) * 255.5f; + rgba_out_u8[3] = rgba_in_u8[3]; + rgba_in_u8 += 4; + rgba_out_u8 += 4; + } + } + + return samples; +} + + +static inline long +universal_rgba_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) +{ + float *matrixf = conversion->conversion.data; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + + babl_matrix_mul_vectorff_buf4 (matrixf, rgba_in, rgba_out, samples); + + return samples; +} + +static inline long +universal_rgb_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) +{ + float *matrixf = conversion->conversion.data; + float *rgb_in = (void*)src_char; + float *rgb_out = (void*)dst_char; + + babl_matrix_mul_vectorff_buf3 (matrixf, rgb_in, rgb_out, samples); + + return samples; +} + +#if defined(USE_SSE2) + +#define m(matr, j, i) matr[j*3+i] + +#include + +static inline void babl_matrix_mul_vectorff_buf4_sse2 (const float *mat, + const float *v_in, + float *v_out, + int samples) +{ + const __v4sf m___0 = {m(mat, 0, 0), m(mat, 1, 0), m(mat, 2, 0), 0}; + const __v4sf m___1 = {m(mat, 0, 1), m(mat, 1, 1), m(mat, 2, 1), 0}; + const __v4sf m___2 = {m(mat, 0, 2), m(mat, 1, 2), m(mat, 2, 2), 0}; + int i; + for (i = 0; i < samples; i ++) + { + __v4sf a, b, c = _mm_load_ps(&v_in[0]); + a = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(0,0,0,0)); + b = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(1,1,1,1)); + c = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(2,2,2,2)); + _mm_store_ps (v_out, m___0 * a + m___1 * b + m___2 * c); + v_out[3] = v_in[3]; + v_out += 4; + v_in += 4; + } + _mm_empty (); +} + +#undef m + +static inline long +universal_nonlinear_rgb_converter_sse2 (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) +{ + const Babl *source_space = babl_conversion_get_source_space (conversion); + const Babl *destination_space = babl_conversion_get_destination_space (conversion); + float * matrixf = conversion->conversion.data; + int i; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + { + int c; + for (c = 0; c < 3; c ++) + { + const Babl *trc = (void*)source_space->space.trc[c]; + babl_trc_to_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, 1, samples); + } + } + for (i = 0; i < samples; i++) + { + rgba_out[i*4+3]=rgba_in[3]; + } + babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples); + { + int c; + for (c = 0; c < 3; c ++) + { + const Babl *trc = (void*)destination_space->space.trc[c]; + babl_trc_from_linear_buf(trc, rgba_out + c, rgba_out + c, 4, 4, 1, samples); + } + } + return samples; +} + + +static inline long +universal_rgba_converter_sse2 (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) +{ + float *matrixf = conversion->conversion.data; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + + babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_in, rgba_out, samples); + + return samples; +} + +static inline long +universal_nonlinear_rgba_u8_converter_sse2 (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) +{ + const Babl *destination_space = conversion->conversion.destination->format.space; + + float * matrixf = conversion->conversion.data; + float * in_trc_lut = matrixf + 9; + int i; + uint8_t *rgba_in_u8 = (void*)src_char; + uint8_t *rgba_out_u8 = (void*)dst_char; + + float *rgb = aligned_alloc (16, sizeof(float) * 4 * samples); + + for (i = 0; i < samples; i++) + { + rgb[i*4+0]=in_trc_lut[rgba_in_u8[i*4+0]]; + rgb[i*4+1]=in_trc_lut[rgba_in_u8[i*4+1]]; + rgb[i*4+2]=in_trc_lut[rgba_in_u8[i*4+2]]; + rgba_out_u8[i*4+3] = rgba_in_u8[i*4+3]; + } + + babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgb, rgb, samples); + + { + int c; + for (c = 0; c < 3; c ++) + { + const Babl *trc = (void*)destination_space->space.trc[0]; + babl_trc_from_linear_buf(trc, rgb + c, rgb + c, 4, 4, 1, samples); + } + + /* XXX: this is a prime candidate for sseification */ + for (i = 0; i < samples; i++) + for (c = 0; c < 3; c ++) + rgba_out_u8[i*4+c] = rgb[i*4+c] * 255.5f; + } + + return samples; +} + +static inline long +universal_nonlinear_rgb_linear_converter_sse2 (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples) +{ + const Babl *source_space = babl_conversion_get_source_space (conversion); + float * matrixf = conversion->conversion.data; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + + { + int i; + for (i = 0; i < samples; i++) + { + rgba_out[i*4+3] = rgba_in[i*4+3]; + } + } + { + int c; + for (c = 0; c < 3; c ++) + { + const Babl *trc = (void*)source_space->space.trc[c]; + babl_trc_to_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, 1, samples); + } + } + + babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples); + + return samples; +} +#endif + + +static int +add_rgb_adapter (Babl *babl, + void *space) +{ + if (babl != space) + { + +#if defined(USE_SSE2) + if ((babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE) && + (babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE2)) + { + prep_conversion(babl_conversion_new(babl_format_with_space("RGBA float", space), + babl_format_with_space("RGBA float", babl), + "linear", universal_rgba_converter_sse2, + NULL)); + prep_conversion(babl_conversion_new(babl_format_with_space("RGBA float", babl), + babl_format_with_space("RGBA float", space), + "linear", universal_rgba_converter_sse2, + NULL)); + prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A float", space), + babl_format_with_space("R'G'B'A float", babl), + "linear", universal_nonlinear_rgb_converter_sse2, + NULL)); + prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A float", babl), + babl_format_with_space("R'G'B'A float", space), + "linear", universal_nonlinear_rgb_converter_sse2, + NULL)); + prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A u8", space), + babl_format_with_space("R'G'B'A u8", babl), + "linear", universal_nonlinear_rgba_u8_converter_sse2, + NULL)); + prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A u8", babl), + babl_format_with_space("R'G'B'A u8", space), + "linear", universal_nonlinear_rgba_u8_converter_sse2, + NULL)); + } + else +#endif + { + prep_conversion(babl_conversion_new(babl_format_with_space("RGBA float", space), + babl_format_with_space("RGBA float", babl), + "linear", universal_rgba_converter, + NULL)); + prep_conversion(babl_conversion_new(babl_format_with_space("RGBA float", babl), + babl_format_with_space("RGBA float", space), + "linear", universal_rgba_converter, + NULL)); + prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A float", space), + babl_format_with_space("R'G'B'A float", babl), + "linear", universal_nonlinear_rgb_converter, + NULL)); + prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A float", babl), + babl_format_with_space("R'G'B'A float", space), + "linear", universal_nonlinear_rgb_converter, + NULL)); + prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A u8", space), + babl_format_with_space("R'G'B'A u8", babl), + "linear", universal_nonlinear_rgba_u8_converter, + NULL)); + prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A u8", babl), + babl_format_with_space("R'G'B'A u8", space), + "linear", universal_nonlinear_rgba_u8_converter, + NULL)); + } + + prep_conversion(babl_conversion_new(babl_format_with_space("RGB float", space), + babl_format_with_space("RGB float", babl), + "linear", universal_rgb_converter, + NULL)); + prep_conversion(babl_conversion_new(babl_format_with_space("RGB float", babl), + babl_format_with_space("RGB float", space), + "linear", universal_rgb_converter, + NULL)); + } + return 0; +} + +/* The first time a new Babl space is used - for creation of a fish, is when + * this function is called, it adds conversions hooks that provides its formats + * with conversions internally as well as for conversions to and from other RGB + * spaces. + */ +void _babl_space_add_universal_rgb (const Babl *space) +{ + babl_space_class_for_each (add_rgb_adapter, (void*)space); +} + diff --git a/babl/babl-trc.c b/babl/babl-trc.c index e22a71a..81b0153 100644 --- a/babl/babl-trc.c +++ b/babl/babl-trc.c @@ -350,23 +350,24 @@ static inline float _babl_trc_gamma_to_linear (const Babl *trc_, float value) } -static inline void _babl_trc_gamma_to_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, int count) +static inline void _babl_trc_gamma_to_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, int components, int count) { BablTRC *trc = (void*)trc_; float gamma = trc->gamma; - int i; + int i, c; for (i = 0; i < count; i ++) - out[out_gap * i] = babl_powf (in[in_gap *i], gamma); + for (c = 0; c < components; c ++) + out[out_gap * i + c] = babl_powf (in[in_gap *i + c], gamma); } - -static inline void _babl_trc_gamma_from_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, int count) +static inline void _babl_trc_gamma_from_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, int components, int count) { BablTRC *trc = (void*)trc_; float gamma = trc->rgamma; - int i; + int i, c; for (i = 0; i < count; i ++) - out[out_gap * i] = babl_powf (in[in_gap *i], gamma); + for (c = 0; c < components; c ++) + out[out_gap * i + c] = babl_powf (in[in_gap *i + c], gamma); } static inline float _babl_trc_gamma_from_linear (const Babl *trc_, float value) @@ -498,90 +499,106 @@ static inline float _babl_trc_srgb_from_linear (const Babl *trc_, float value) return babl_linear_to_gamma_2_2f (value); } -static inline void _babl_trc_srgb_to_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, int count) +static inline void _babl_trc_srgb_to_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, int components, int count) { - int i; + int i, c; for (i = 0; i < count; i ++) - out[out_gap * i] = babl_gamma_2_2_to_linearf (in[in_gap * i]); + for (c = 0; c < components; c++) + out[out_gap * i + c] = babl_gamma_2_2_to_linearf (in[in_gap * i + c]); } static inline void _babl_trc_srgb_from_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, + int components, int count) { - int i; + int i, c; for (i = 0; i < count; i ++) - out[out_gap * i] = babl_linear_to_gamma_2_2f (in[in_gap * i]); + for (c = 0; c < components; c++) + out[out_gap * i + c] = babl_linear_to_gamma_2_2f (in[in_gap * i + c]); } -static inline void _babl_trc_to_linear_buf_generic (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, int count) +static inline void _babl_trc_to_linear_buf_generic (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, int components, int count) { - int i; + int i, c; BablTRC *trc = (void*)trc_; for (i = 0; i < count; i ++) - out[out_gap * i] = trc->fun_to_linear (trc_, in[in_gap * i]); + for (c = 0; c < components; c ++) + out[out_gap * i + c] = trc->fun_to_linear (trc_, in[in_gap * i + c]); } static inline void _babl_trc_from_linear_buf_generic (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, + int components, int count) { - int i; + int i, c; BablTRC *trc = (void*)trc_; for (i = 0; i < count; i ++) - out[out_gap * i] = trc->fun_from_linear (trc_, in[in_gap * i]); + for (c = 0; c < components; c ++) + out[out_gap * i + c] = trc->fun_from_linear (trc_, in[in_gap * i + c]); } static inline void _babl_trc_gamma_1_8_from_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, + int components, int count) { - int i; + int i, c; for (i = 0; i < count; i ++) - out[i * out_gap] = _babl_trc_gamma_1_8_from_linear (trc_, in[i * in_gap]); + for (c = 0; c < components; c ++) + out[i * out_gap + c] = _babl_trc_gamma_1_8_from_linear (trc_, in[i * in_gap + c]); } static inline void _babl_trc_gamma_2_2_from_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, + int components, int count) { - int i; + int i, c; for (i = 0; i < count; i ++) - out[i * out_gap] = _babl_trc_gamma_2_2_from_linear (trc_, in[i * in_gap]); + for (c = 0; c < components; c ++) + out[i * out_gap + c] = _babl_trc_gamma_2_2_from_linear (trc_, in[i * in_gap + c]); } static inline void _babl_trc_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, + int components, int count) { - int i; + int i, c; for (i = 0; i < count; i ++) - out[i * out_gap] = in[i * in_gap]; + for (c = 0; c < components; c ++) + out[i * out_gap + c] = in[i * in_gap + c]; } static inline void _babl_trc_gamma_1_8_to_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, + int components, int count) { - int i; + int i, c; for (i = 0; i < count; i ++) - out[i * out_gap] = _babl_trc_gamma_1_8_to_linear (trc_, in[i * in_gap]); + for (c = 0; c < components; c ++) + out[i * out_gap + c] = _babl_trc_gamma_1_8_to_linear (trc_, in[i * in_gap + c]); } static inline void _babl_trc_gamma_2_2_to_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, + int components, int count) { - int i; + int i, c; for (i = 0; i < count; i ++) - out[i * out_gap] = _babl_trc_gamma_2_2_to_linear (trc_, in[i * in_gap]); + for (c = 0; c < components; c ++) + out[i * out_gap + c] = _babl_trc_gamma_2_2_to_linear (trc_, in[i * in_gap + c]); } diff --git a/babl/babl-trc.h b/babl/babl-trc.h index 4dafd04..0b7fb70 100644 --- a/babl/babl-trc.h +++ b/babl/babl-trc.h @@ -48,12 +48,14 @@ typedef struct float *out, int in_gap, int out_gap, + int components, int count); void (*fun_from_linear_buf)(const Babl *trc, const float *in, float *out, int in_gap, int out_gap, + int components, int count); float *lut; float *inv_lut; @@ -63,19 +65,21 @@ typedef struct static inline void babl_trc_from_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, + int components, int count) { BablTRC *trc = (void*)trc_; - trc->fun_from_linear_buf (trc_, in, out, in_gap, out_gap, count); + trc->fun_from_linear_buf (trc_, in, out, in_gap, out_gap, components, count); } static inline void babl_trc_to_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, + int components, int count) { BablTRC *trc = (void*)trc_; - trc->fun_to_linear_buf (trc_, in, out, in_gap, out_gap, count); + trc->fun_to_linear_buf (trc_, in, out, in_gap, out_gap, components, count); } static inline float babl_trc_from_linear (const Babl *trc_, float value) -- 2.30.2